	
	
	*  	This file creates a dataset with all candidates who ran for state leg
	*  	in states with term limits (except LA) between 1984 and 2014
	
project ,  original("../data_input/185slers1967to2016_20180529.dta")

	**open Klarner
	use "../data_input/185slers1967to2016_20180529.dta", clear
	*rename variables
	rename candid CandId
	gen CandName = upper(cand)
	gen chamber = "house"
	replace chamber = "senate" if sen==1
	gen state =upper(sab)
	drop party
	gen party="Other"
	replace party="Dem" if partyz=="d"
	replace party="Rep" if partyz=="r"
	rename termz TermLengthActual 
	gen win = outcome=="w"
	rename votes candidatevotes
	rename dno district
	rename ddez districtidentifier
	rename dseats NumberOfSeats
	
	
	* fix errors in Klarner
	replace CandId = -13234 if CandId == 13234 & year>=2012 // two different MI candidates: Ian Calderon is the son of Charles Calderon
	replace CandName="Calderon, Ian" if CandId == -13234 
	replace CandId = -286792 if CandId == 286792 & year>=2014 // two different MI candidates: Fred Durhal jr. and Fred Durhal III
	replace CandId = 267043 if CandId==290458 & year==2008 // Mike Brown is listed as Mike Borwn
	replace CandName = "BROWN, MIKE" if CandId==267043 & year==2008 // Mike Brown is listed as Mike Borwn
	replace CandId = 291705 if CandId==305390 & year==2010 // 
	replace CandName = "BLESSING, LOUIS W. III" if CandId==179983 & [year==2012 | year==2014] // LOUIS BLESSING III was the son of LOUIS BLESSING JR.
	replace CandId = -179983 if CandId==179983 & [year==2012 | year==2014] // LOUIS BLESSING III was the son of LOUIS BLESSING JR.
	replace CandName = "BOYD, JANINE" if CandId==183245 & [ year==2014] // LOUIS BLESSING III was the son of LOUIS BLESSING JR.
	replace CandId = -183245 if CandId==183245 & [year==2014] // Janine Boyd was the daughter of Barbara Boyd
	replace CandName="BRANAE, GARY" if CandId==129366 & year==2000 // misspelled name in Klarner
	replace CandId=129326 if CandId==129366 & year==2000
	replace CandName="YARBROUGH, KEN" if CandId==222159 & year==1990 // misspelled name in Klarner: VARBROUGH, KEN (correct is YARBROUGH, KEN)
	replace CandId=222161 if CandId==222159 & year==2000	
	replace CandName="MARTINEZFISCHER, TREY" if CandId==305858
	replace CandId=221489 if CandId==305858
	replace CandName="MALLORYCARAWAY, BARBARA" if CandId==305848
	replace CandId=280363 if CandId==305848	
	replace CandName="YOST, JERRY" if CandId==226120
	replace CandId=226142 if CandId==226120		
	replace CandName="DORSEYWELCH, YVONNE" if CandId==312287
	replace CandId=81524 if CandId==312287		


	keep if year>=1978 & year<=2014 // we do not include the 2016 election because we don't have outcome data for the 2017-2019 sessions
	keep if regexm(state, "AR|AZ|CA|CO|FL|LA|ME|MI|MO|MT|NV|OH|OK|SD|TX|NY") // 
	*keep if etype=="g" | etype=="lafsettled" //	keep only general elections
	keep if deter==1 // when deter==1, the winner of the electoral contest takes a seat in the legislative chamber
	keep if chamber=="house" | [ [state=="AZ" & chamber=="senate"] |  [state=="ME" & chamber=="senate"] |  [state=="LA" & chamber=="senate"] ] 
	
	preserve
		collapse (min) first_run_ = year (max) last_run_ = year , by(CandId chamber)
		reshape wide first_run_ last_run_ , i(CandId) j(chamber) string
		save ../data_output/first_last_run.dta, replace
project, creates(../data_output/first_last_run.dta)		
	restore	
	
	gen NotRepOrDem = party=="Other"
	sort CandId year party NotRepOrDem
	*keep one observations per candidate year	
	collapse (sum) candidatevotes (first) state chamber district districtidentifier CandName TermLengthActual NumberOfSeats win party, by(  CandId   year)
	bysort state chamber  districtidentifier year: egen totalvotes = total(candidatevotes)
	gen vtsh = 100*candidatevotes/totalvotes
	replace vtsh = 100 if vtsh==.
	drop totalvotes	
	
	
		
	save ../data_output/candidates.dta, replace
project ,  creates("../data_output/candidates.dta")

	
	
	use ../data_output/candidates.dta, clear
	**create dataset with info on what elections candidate run in 
	
	keep CandId chamber  year 
	sort CandId chamber  year
	bys CandId chamber : gen n=_n //number the elections in which a candidate ran from first to last
	sum n
	local max_elections=r(max) //max is the number of elections of the candidate who ran in most electons
	reshape wide year, i(CandId chamber) j(n)
	tostring year*, replace
	forval i=1/`max_elections' {
		replace year`i' = "" if year`i'=="."
		replace year`i' = year`i' + ";" if year`i'!=""
	}
	egen elections_run = concat(year*)
	drop year*
	reshape wide elections_run,i(CandId) j(chamber) string
	
	save ../data_output/elections_run, replace		
	
project ,  creates("../data_output/elections_run.dta")







**create dataset with legislators
	use ../data_output/candidates.dta, clear
	
	keep if win==1
		
	sort  CandId chamber year
	bys CandId chamber: gen terms_in_office=_n
	
	preserve
	
		keep state CandId chamber terms_in_office year
		sum terms_in_office
		local max_terms=r(max) //max is the number of terms of the legislator who served most terms		
		reshape wide year, i(CandId chamber) j(terms_in_office)
		tostring year*, replace
		forval i=1/`max_terms' {
			replace year`i' = "" if year`i'=="."
			replace year`i' = year`i' + ";" if year`i'!=""
		}
		egen elections_won = concat(year*)
		drop year*
		save ../data_output/elections_won, replace

	restore
	
	
	keep win state chamber party CandId year TermLengthActual  CandName terms_in_office district vtsh

	gen inoffice = year+1
	tostring inoffice, replace
	gen term = ""
	replace term = "1985-1986" if regexm(inoffice, "1985|1986")
	replace term = "1987-1988" if regexm(inoffice, "1987|1988")
	replace term = "1989-1990" if regexm(inoffice, "1989|1990")	
	replace term = "1991-1992" if regexm(inoffice, "1991|1992")
	replace term = "1993-1994" if regexm(inoffice, "1993|1994")
	replace term = "1995-1996" if regexm(inoffice, "1995|1996")	
	replace term = "1997-1998" if regexm(inoffice, "1997|1998")
	replace term = "1999-2000" if regexm(inoffice, "1999|2000")
	replace term = "2001-2002" if regexm(inoffice, "2001|2002")
	replace term = "2003-2004" if regexm(inoffice, "2003|2004")
	replace term = "2005-2006" if regexm(inoffice, "2005|2006")
	replace term = "2007-2008" if regexm(inoffice, "2007|2008")
	replace term = "2009-2010" if regexm(inoffice, "2009|2010")
	replace term = "2011-2012" if regexm(inoffice, "2011|2012")
	replace term = "2013-2014" if regexm(inoffice, "2013|2014")
	replace term = "2015-2016" if regexm(inoffice, "2015|2016")	
	
	replace term = "2016-2019" if regexm(inoffice, "2016|2017|2018|2019")	& regexm(lower(state), "la")
	replace term = "2012-2015" if regexm(inoffice, "2012|2013|2014|2015")	& regexm(lower(state), "la")
	replace term = "2008-2011" if regexm(inoffice, "2008|2009|2010|2011")	& regexm(lower(state), "la")
	replace term = "2004-2007" if regexm(inoffice, "2004|2005|2006|2007")	& regexm(lower(state), "la")
	replace term = "2000-2003" if regexm(inoffice, "2000|2001|2002|2003")	& regexm(lower(state), "la")
	replace term = "1996-1999" if regexm(inoffice, "1996|1997|1998|1999")	& regexm(lower(state), "la")
	replace term = "1992-1995" if regexm(inoffice, "1992|1993|1994|1995")	& regexm(lower(state), "la")
	replace term = "1988-1991" if regexm(inoffice, "1988|1989|1990|1990")	& regexm(lower(state), "la")


	
	
	merge m:1 CandId chamber using ../data_output/elections_won
	keep if _merge==3 // 
	drop _merge
	merge m:1 CandId using ../data_output/elections_run
	keep if _merge==3 // 
	drop _merge
	save ../data_output/legislators_klarner.dta, replace
	
project ,  creates("../data_output/legislators_klarner.dta")
	
	
	
	
		
		
